# -*- coding: utf-8 -*-
"""
@Time    : 2024/7/10 19:37 
@Author  : ZhangShenao 
@File    : document_loader.py 
@Desc    : 文档加载器
"""
import os

from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader


class DocumentLoader:
    """
    文档加载器,用于从指定路径加载文档
    """

    def __init__(self, base_path):
        """
        构造函数
        :param base_path: 文档的基础路径
        """
        self.__base_path = base_path

    def load_docs(self) -> []:
        """
        加载文档
        :return: 加载的文档列表
        """

        try:
            docs = []
            for file in os.listdir(self.__base_path):
                file_path = os.path.join(self.__base_path, file)
                if file_path.endswith('.pdf'):
                    docs.extend(PyPDFLoader(file_path).load())
                elif file_path.endswith('.docx'):
                    docs.extend(Docx2txtLoader(file_path).load())
                elif file_path.endswith('.txt'):
                    docs.extend(TextLoader(file_path).load())
            return docs
        except Exception as e:
            print(f'load documents error: {e}')
            return []
